Importing Needed Packages

Importing the Data

oil_consumption1 <- read_excel("data/oil-consumption-per-TBD.xlsx")

# change the class of the columns to numeric 
vec<- seq(2,57,1)
oil_consumption1[ , vec] <- apply(oil_consumption1[ , vec,drop=F], 2,           
                                 function(x) as.numeric(as.character(x)))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion
# Tidy my data 
oil_consumption1 <- oil_consumption1 %>% 
  pivot_longer(-c(country, per_region), names_to = "year", values_to = "oil_comsumption_in_EJ")

# Fill the missing values by back filling method  
oil_consumption1 <- oil_consumption1 %>% fill(oil_comsumption_in_EJ, .direction = "up")
sum(is.na(oil_consumption1))
## [1] 0
# head(oil_consumption1)
# Convert the data frame to time series 
consumption_ts <- oil_consumption1 %>% group_by(year) %>% summarise(Consumption_in_MBD = sum(oil_comsumption_in_EJ)/1000) %>% 
  dplyr::select(Consumption_in_MBD) %>%
  ts(start = 1965, frequency = 1)

Exploring

dygraph(consumption_ts, 
        main  = "World Oil Consumption",
        ylab  = "Consumption in Million Barrels Daily",
        xlab  = "Years") %>% 
  dyRangeSelector()
ts_cor(consumption_ts)# Illustrating the lag seasonality 

Time Series Models

dyy<- diff(consumption_ts) # taking the first difference of the time series to eliminate the trend and make it stationary
autoplot(dyy)

Seasonal Naive model

fit<- snaive(dyy) #Residual SD: 1.9801   
print(summary(fit))
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = dyy) 
## 
## Residual sd: 1.9801 
## 
## Error measures:
##                      ME     RMSE      MAE       MPE     MAPE MASE      ACF1
## Training set -0.2076996 1.980146 1.219474 -10.20389 144.3203    1 -0.105236
## 
## Forecasts:
##      Point Forecast     Lo 80     Hi 80     Lo 95     Hi 95
## 2021      -9.120961 -11.65862 -6.583302 -13.00197 -5.239947
## 2022      -9.120961 -12.70975 -5.532170 -14.60954 -3.632378
checkresiduals(fit)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 6.7596, df = 10, p-value = 0.7479
## 
## Model df: 0.   Total lags used: 10

ETS Model

fit_ets<- ets(consumption_ts) #Residual SD: 0.0258
print(summary(fit_ets))
## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = consumption_ts) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.5875 
##     phi   = 0.8619 
## 
##   Initial states:
##     l = 33.516 
##     b = 2.1526 
## 
##   sigma:  0.0257
## 
##      AIC     AICc      BIC 
## 294.5272 296.2414 306.6793 
## 
## Training set error measures:
##                      ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 0.03634579 1.851328 1.10657 0.1518389 1.631993 0.6544345
##                    ACF1
## Training set 0.05214623
checkresiduals(fit_ets)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(M,Ad,N)
## Q* = 6.477, df = 5, p-value = 0.2625
## 
## Model df: 5.   Total lags used: 10

ARIMA Model

fit_arima<- auto.arima(consumption_ts, d=1) #Residual SD: 1.843909
print(summary(fit_arima))
## Series: consumption_ts 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##          ma1   drift
##       0.5424  0.8684
## s.e.  0.1897  0.3753
## 
## sigma^2 estimated as 3.4:  log likelihood=-110.85
## AIC=227.7   AICc=228.17   BIC=233.72
## 
## Training set error measures:
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.006192155 1.793768 1.129508 0.1226427 1.687617 0.6680002
##                     ACF1
## Training set 0.007343122
checkresiduals(fit_arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 2.9032, df = 8, p-value = 0.9403
## 
## Model df: 2.   Total lags used: 10

#AR model

library(vars)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:plotly':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## Loading required package: strucchange
## Loading required package: sandwich
## 
## Attaching package: 'strucchange'
## The following object is masked from 'package:stringr':
## 
##     boundary
## Loading required package: urca
## Loading required package: lmtest
## 
## Attaching package: 'vars'
## The following object is masked from 'package:tidyquant':
## 
##     VAR
library(mFilter)
library(tseries)
library(TSstudio)
library(forecast)
library(tidyverse)
VARselect(consumption_ts,lag.max = 10, type = "const")
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      2      2      2      2 
## 
## $criteria
##               1        2        3        4        5        6        7        8
## AIC(n) 1.369862 1.299587 1.335281 1.378151 1.374155 1.406465 1.447702 1.485097
## HQ(n)  1.399645 1.344262 1.394848 1.452610 1.463505 1.510708 1.566836 1.619123
## SC(n)  1.449368 1.418846 1.494293 1.576917 1.612673 1.684737 1.765726 1.842875
## FPE(n) 3.935022 3.668461 3.802737 3.970983 3.957645 4.091239 4.268548 4.438020
##               9       10
## AIC(n) 1.526807 1.567175
## HQ(n)  1.675725 1.730984
## SC(n)  1.924338 2.004459
## FPE(n) 4.636026 4.838562
AR <- ar(diff(consumption_ts), p= 2, type = "const") # Residual SD = 1.916507
print(summary(AR)) 
##              Length Class  Mode     
## order         1     -none- numeric  
## ar            1     -none- numeric  
## var.pred      1     -none- numeric  
## x.mean        1     -none- numeric  
## aic          18     -none- numeric  
## n.used        1     -none- numeric  
## n.obs         1     -none- numeric  
## order.max     1     -none- numeric  
## partialacf   17     -none- numeric  
## resid        55     ts     numeric  
## method        1     -none- character
## series        1     -none- character
## frequency     1     -none- numeric  
## call          4     -none- call     
## asy.var.coef  1     -none- numeric
checkresiduals(AR)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

Forcasting Oil Consumtion

I have chosen the ETS since it has the lowest residual

fcs_cons <- forecast(fit_ets, h= 10) # forecasting 10 years 
autoplot(fcs_cons)

print(summary(fcs_cons))
## 
## Forecast method: ETS(M,Ad,N)
## 
## Model Information:
## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = consumption_ts) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.5875 
##     phi   = 0.8619 
## 
##   Initial states:
##     l = 33.516 
##     b = 2.1526 
## 
##   sigma:  0.0257
## 
##      AIC     AICc      BIC 
## 294.5272 296.2414 306.6793 
## 
## Error measures:
##                      ME     RMSE     MAE       MPE     MAPE      MASE
## Training set 0.03634579 1.851328 1.10657 0.1518389 1.631993 0.6544345
##                    ACF1
## Training set 0.05214623
## 
## Forecasts:
##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2021       84.05387 81.28489 86.82285 79.81908 88.28866
## 2022       80.24052 75.30143 85.17961 72.68684 87.79421
## 2023       76.95388 69.79343 84.11434 66.00291 87.90486
## 2024       74.12120 64.72408 83.51831 59.74955 88.49284
## 2025       71.67976 60.06634 83.29319 53.91856 89.44097
## 2026       69.57555 55.79087 83.36022 48.49371 90.65739
## 2027       67.76196 51.86680 83.65713 43.45240 92.07153
## 2028       66.19888 48.26311 84.13464 38.76849 93.62926
## 2029       64.85169 44.94975 84.75362 34.41431 95.28907
## 2030       63.69057 41.89822 85.48292 30.36205 97.01909
plot_forecast(fcs_cons)

As you can see because of the pandemic the forecasting was miss leaded. Therefore, I tried to approach this problem from different side. I eliminated the 2020 drop (from the pandemic) and the forecast, lets see what happened

oil_consumption2 <- read_excel("data/oil-consumption-per-TBD_2.xlsx")
 # change the class of the columns to numeric 
vec<- seq(2,56,1)
oil_consumption2[ , vec] <- apply(oil_consumption2[ , vec,drop=F], 2,           
                                 function(x) as.numeric(as.character(x)))
## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion
# Tidy my data 
oil_consumption2 <- oil_consumption2 %>% 
  pivot_longer(-c(country, per_region), names_to = "year", values_to = "oil_comsumption_in_MBD")

# therefore we will do a back fill 
oil_consumption2 <- oil_consumption2 %>% fill(oil_comsumption_in_MBD, .direction = "up")
sum(is.na(oil_consumption1))
## [1] 0
consumption_ts1 <- oil_consumption2 %>% group_by(year) %>% summarise(Consumption_in_MBD = sum(oil_comsumption_in_MBD)/1000) %>% 
 dplyr:: select(Consumption_in_MBD ) %>%
ts(, start = c(1965), frequency = 1)
plot(consumption_ts1)

Seasonal Naive model

fit1<- snaive(diff(consumption_ts1)) #Residual SD: 1.5193     
print(summary(fit1))
## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = diff(consumption_ts1)) 
## 
## Residual sd: 1.5193 
## 
## Error measures:
##                       ME     RMSE      MAE       MPE     MAPE MASE       ACF1
## Training set -0.03323201 1.519344 1.064096 -12.35221 145.0876    1 -0.2414877
## 
## Forecasts:
##      Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 2020      0.3335225 -1.613595 2.280640 -2.644336 3.311381
## 2021      0.3335225 -2.420117 3.087162 -3.877806 4.544851
checkresiduals(fit1)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 17.834, df = 10, p-value = 0.05782
## 
## Model df: 0.   Total lags used: 10

ETS Model

fit_ets1<- ets(consumption_ts1) #Residual SD: 1.3983
print(summary(fit_ets1))
## ETS(A,Ad,N) 
## 
## Call:
##  ets(y = consumption_ts1) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.575 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 32.91 
##     b = 4.6211 
## 
##   sigma:  1.3983
## 
##      AIC     AICc      BIC 
## 264.0384 265.7884 276.0824 
## 
## Training set error measures:
##                     ME     RMSE       MAE       MPE     MAPE      MASE
## Training set 0.2675777 1.333213 0.9688798 0.3861036 1.504087 0.6237615
##                    ACF1
## Training set 0.02412095
checkresiduals(fit_ets1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,Ad,N)
## Q* = 12.097, df = 5, p-value = 0.03348
## 
## Model df: 5.   Total lags used: 10

ARIMA Model

fit_arima1<- auto.arima(consumption_ts1, d=1) #Residual SD: 1.246595
print(summary(fit_arima1))
## Series: consumption_ts1 
## ARIMA(1,1,1) with drift 
## 
## Coefficients:
##           ar1     ma1   drift
##       -0.3628  0.8637  1.1434
## s.e.   0.1701  0.0954  0.2246
## 
## sigma^2 estimated as 1.554:  log likelihood=-87.36
## AIC=182.72   AICc=183.53   BIC=190.67
## 
## Training set error measures:
##                        ME     RMSE       MAE        MPE    MAPE      MASE
## Training set -0.003135582 1.200341 0.9507481 0.06764132 1.48257 0.6120884
##                    ACF1
## Training set 0.05812566
checkresiduals(fit_arima1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,1,1) with drift
## Q* = 3.9284, df = 7, p-value = 0.788
## 
## Model df: 3.   Total lags used: 10

#AR model

library(vars)
library(mFilter)
library(tseries)
library(TSstudio)
library(forecast)
library(tidyverse)
VARselect(consumption_ts1,lag.max = 10, type = "const")
## $selection
## AIC(n)  HQ(n)  SC(n) FPE(n) 
##      6      2      2      6 
## 
## $criteria
##                1         2         3         4         5         6         7
## AIC(n) 0.5199406 0.4421863 0.4624644 0.5065880 0.4359571 0.4114818 0.4554496
## HQ(n)  0.5498741 0.4870867 0.5223315 0.5814220 0.5257579 0.5162494 0.5751840
## SC(n)  0.6002367 0.5626305 0.6230566 0.7073283 0.6768454 0.6925182 0.7766341
## FPE(n) 1.6820262 1.5564139 1.5887298 1.6611487 1.5489146 1.5128999 1.5829150
##                8         9        10
## AIC(n) 0.4994564 0.5312357 0.5755493
## HQ(n)  0.6341576 0.6809036 0.7401840
## SC(n)  0.8607889 0.9327163 1.0171779
## FPE(n) 1.6568555 1.7139084 1.7961610
AR1 <- ar(diff(consumption_ts1), p= 2, type = "const") # Residual SD = 1.2907
AR1 <- ar(diff(consumption_ts1), p= 6, type = "const") # Residual SD = 1.2907

print(summary(AR)) 
##              Length Class  Mode     
## order         1     -none- numeric  
## ar            1     -none- numeric  
## var.pred      1     -none- numeric  
## x.mean        1     -none- numeric  
## aic          18     -none- numeric  
## n.used        1     -none- numeric  
## n.obs         1     -none- numeric  
## order.max     1     -none- numeric  
## partialacf   17     -none- numeric  
## resid        55     ts     numeric  
## method        1     -none- character
## series        1     -none- character
## frequency     1     -none- numeric  
## call          4     -none- call     
## asy.var.coef  1     -none- numeric
checkresiduals(AR)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.

# forecast
fcs_cons1 <- forecast(fit_arima1, h= 10)
autoplot(fcs_cons1)

print(summary(fcs_cons1))
## 
## Forecast method: ARIMA(1,1,1) with drift
## 
## Model Information:
## Series: consumption_ts1 
## ARIMA(1,1,1) with drift 
## 
## Coefficients:
##           ar1     ma1   drift
##       -0.3628  0.8637  1.1434
## s.e.   0.1701  0.0954  0.2246
## 
## sigma^2 estimated as 1.554:  log likelihood=-87.36
## AIC=182.72   AICc=183.53   BIC=190.67
## 
## Error measures:
##                        ME     RMSE       MAE        MPE    MAPE      MASE
## Training set -0.003135582 1.200341 0.9507481 0.06764132 1.48257 0.6120884
##                    ACF1
## Training set 0.05812566
## 
## Forecasts:
##      Point Forecast     Lo 80    Hi 80    Lo 95    Hi 95
## 2020       98.51155  96.91406 100.1090 96.06840 100.9547
## 2021       99.73846  96.85728 102.6196 95.33207 104.1449
## 2022      100.85159  97.28191 104.4213 95.39223 106.3109
## 2023      102.00599  97.80612 106.2059 95.58284 108.4292
## 2024      103.14542  98.41597 107.8749 95.91235 110.3785
## 2025      104.29029  99.07906 109.5015 96.32040 112.2602
## 2026      105.43318  99.78303 111.0833 96.79202 114.0743
## 2027      106.57678 100.51879 112.6348 97.31187 115.8417
## 2028      107.72013 101.28028 114.1600 97.87123 117.5690
## 2029      108.86357 102.06320 115.6639 98.46331 119.2638
plot_forecast(fcs_cons1)
forcast_con1 <- fortify(fcs_cons1) 
forcast_con1[1:56, 3]
##  [1] 35.88447 37.17609 39.49620 41.41403 44.47001 47.36249 50.65372 52.38458
##  [9] 56.36573 59.71718 56.20743 57.55541 60.84182 61.98083 65.66493 64.90642
## [17] 61.27084 60.41140 58.11362 59.36699 60.10207 60.47672 62.95522 63.45150
## [25] 66.43129 66.41210 67.65394 66.97134 69.04226 67.50697 71.25237 70.56000
## [33] 73.77546 74.79043 75.12704 77.31022 77.08545 78.84964 78.95014 81.68998
## [41] 84.17259 84.48699 86.01142 86.90470 84.95955 84.55628 88.81948 87.62523
## [49] 90.62271 90.49744 92.14051 94.13754 95.57062 97.49088 98.20498       NA
consumption_df1 <- data.frame(date  = forcast_con1$Index, consumption = c(forcast_con1 [ 1:56, 3], forcast_con1 [ 57:nrow(forcast_con1), 4]))
model <- consumption_df1

Now lets add the drop in 2020